267b4e
@@ -338,6 +338,8 @@
public Vectorizer() {
 
     String[] scratchTypeNameArray;
 
+    Set<Operator<? extends OperatorDesc>> nonVectorizedOps;
+
     VectorTaskColumnInfo() {
       partitionColumnCount = 0;
     }
@@ -355,6 +357,14 @@
public void setScratchTypeNameArray(String[] scratchTypeNameArray) {
       this.scratchTypeNameArray = scratchTypeNameArray;
     }
 
+    public void setNonVectorizedOps(Set<Operator<? extends OperatorDesc>> nonVectorizedOps) {
+      this.nonVectorizedOps = nonVectorizedOps;
+    }
+
+    public Set<Operator<? extends OperatorDesc>> getNonVectorizedOps() {
+      return nonVectorizedOps;
+    }
+
     public void transferToBaseWork(BaseWork baseWork) {
 
       String[] columnNameArray = columnNames.toArray(new String[0]);
@@ -701,6 +711,7 @@
private boolean validateMapWork(MapWork mapWork, VectorTaskColumnInfo vectorTask
           }
         }
       }
+      vectorTaskColumnInfo.setNonVectorizedOps(vnp.getNonVectorizedOps());
       return true;
     }
 
@@ -819,6 +830,7 @@
private boolean validateReduceWork(ReduceWork reduceWork,
           }
         }
       }
+      vectorTaskColumnInfo.setNonVectorizedOps(vnp.getNonVectorizedOps());
       return true;
     }
 
@@ -863,6 +875,14 @@
private void vectorizeReduceWork(ReduceWork reduceWork,
     private final MapWork mapWork;
     private final boolean isTez;
 
+    // Children of Vectorized GROUPBY that outputs rows instead of vectorized row batchs.
+    protected final Set<Operator<? extends OperatorDesc>> nonVectorizedOps =
+        new HashSet<Operator<? extends OperatorDesc>>();
+
+    public Set<Operator<? extends OperatorDesc>> getNonVectorizedOps() {
+      return nonVectorizedOps;
+    }
+
     public MapWorkValidationNodeProcessor(MapWork mapWork, boolean isTez) {
       this.mapWork = mapWork;
       this.isTez = isTez;
@@ -873,7 +893,7 @@
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
         Object... nodeOutputs) throws SemanticException {
       for (Node n : stack) {
         Operator<? extends OperatorDesc> op = (Operator<? extends OperatorDesc>) n;
-        if (nonVectorizableChildOfGroupBy(op)) {
+        if (nonVectorizedOps.contains(op)) {
           return new Boolean(true);
         }
         boolean ret;
@@ -886,6 +906,12 @@
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
           LOG.info("MapWork Operator: " + op.getName() + " could not be vectorized.");
           return new Boolean(false);
         }
+        // When Vectorized GROUPBY outputs rows instead of vectorized row batches, we don't
+        // vectorize the operators below it.
+        if (isVectorizedGroupByThatOutputsRows(op)) {
+          addOperatorChildrenToSet(op, nonVectorizedOps);
+          return new Boolean(true);
+        }
       }
       return new Boolean(true);
     }
@@ -893,12 +919,24 @@
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
 
   class ReduceWorkValidationNodeProcessor implements NodeProcessor {
 
+    // Children of Vectorized GROUPBY that outputs rows instead of vectorized row batchs.
+    protected final Set<Operator<? extends OperatorDesc>> nonVectorizedOps =
+        new HashSet<Operator<? extends OperatorDesc>>();
+
+    public Set<Operator<? extends OperatorDesc>> getNonVectorizeOps() {
+      return nonVectorizedOps;
+    }
+
+    public Set<Operator<? extends OperatorDesc>> getNonVectorizedOps() {
+      return nonVectorizedOps;
+    }
+
     @Override
     public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
         Object... nodeOutputs) throws SemanticException {
       for (Node n : stack) {
         Operator<? extends OperatorDesc> op = (Operator<? extends OperatorDesc>) n;
-        if (nonVectorizableChildOfGroupBy(op)) {
+        if (nonVectorizedOps.contains(op)) {
           return new Boolean(true);
         }
         boolean ret = validateReduceWorkOperator(op);
@@ -906,6 +944,12 @@
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
           LOG.info("ReduceWork Operator: " + op.getName() + " could not be vectorized.");
           return new Boolean(false);
         }
+        // When Vectorized GROUPBY outputs rows instead of vectorized row batches, we don't
+        // vectorize the operators below it.
+        if (isVectorizedGroupByThatOutputsRows(op)) {
+          addOperatorChildrenToSet(op, nonVectorizedOps);
+          return new Boolean(true);
+        }
       }
       return new Boolean(true);
     }
@@ -918,7 +962,10 @@
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
     // The vectorization context for the Map or Reduce task.
     protected VectorizationContext taskVectorizationContext;
 
-    VectorizationNodeProcessor() {
+    protected final Set<Operator<? extends OperatorDesc>> nonVectorizedOps;
+
+    VectorizationNodeProcessor(Set<Operator<? extends OperatorDesc>> nonVectorizedOps) {
+      this.nonVectorizedOps = nonVectorizedOps;
     }
 
     public String[] getVectorScratchColumnTypeNames() {
@@ -997,7 +1044,7 @@
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
 
     public MapWorkVectorizationNodeProcessor(MapWork mWork, boolean isTez,
         VectorTaskColumnInfo vectorTaskColumnInfo) {
-      super();
+      super(vectorTaskColumnInfo.getNonVectorizedOps());
       this.mWork = mWork;
       this.vectorTaskColumnInfo = vectorTaskColumnInfo;
       this.isTez = isTez;
@@ -1008,6 +1055,9 @@
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
         Object... nodeOutputs) throws SemanticException {
 
       Operator<? extends OperatorDesc> op = (Operator<? extends OperatorDesc>) nd;
+      if (nonVectorizedOps.contains(op)) {
+        return null;
+      }
 
       VectorizationContext vContext = null;
 
@@ -1031,16 +1081,6 @@
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
             + " using vectorization context" + vContext.toString());
       }
 
-      // When Vectorized GROUPBY outputs rows instead of vectorized row batchs, we don't
-      // vectorize the operators below it.
-      if (nonVectorizableChildOfGroupBy(op)) {
-        // No need to vectorize
-        if (!opsDone.contains(op)) {
-            opsDone.add(op);
-          }
-        return null;
-      }
-
       Operator<? extends OperatorDesc> vectorOp = doVectorize(op, vContext, isTez);
 
       if (LOG.isDebugEnabled()) {
@@ -1070,7 +1110,7 @@
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
     public ReduceWorkVectorizationNodeProcessor(VectorTaskColumnInfo vectorTaskColumnInfo,
             boolean isTez) {
 
-      super();
+      super(vectorTaskColumnInfo.getNonVectorizedOps());
       this.vectorTaskColumnInfo =  vectorTaskColumnInfo;
       rootVectorOp = null;
       this.isTez = isTez;
@@ -1081,6 +1121,9 @@
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
         Object... nodeOutputs) throws SemanticException {
 
       Operator<? extends OperatorDesc> op = (Operator<? extends OperatorDesc>) nd;
+      if (nonVectorizedOps.contains(op)) {
+        return null;
+      }
 
       VectorizationContext vContext = null;
 
@@ -1110,16 +1153,6 @@
public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
       assert vContext != null;
       LOG.info("ReduceWorkVectorizationNodeProcessor process operator " + op.getName() + " using vectorization context" + vContext.toString());
 
-      // When Vectorized GROUPBY outputs rows instead of vectorized row batchs, we don't
-      // vectorize the operators below it.
-      if (nonVectorizableChildOfGroupBy(op)) {
-        // No need to vectorize
-        if (!opsDone.contains(op)) {
-          opsDone.add(op);
-        }
-        return null;
-      }
-
       Operator<? extends OperatorDesc> vectorOp = doVectorize(op, vContext, isTez);
 
       if (LOG.isDebugEnabled()) {
@@ -1267,20 +1300,24 @@
boolean validateReduceWorkOperator(Operator<? extends OperatorDesc> op) {
     return ret;
   }
 
-  public Boolean nonVectorizableChildOfGroupBy(Operator<? extends OperatorDesc> op) {
-    Operator<? extends OperatorDesc> currentOp = op;
-    while (currentOp.getParentOperators().size() > 0) {
-      currentOp = currentOp.getParentOperators().get(0);
-      if (currentOp.getType().equals(OperatorType.GROUPBY)) {
-        GroupByDesc desc = (GroupByDesc)currentOp.getConf();
-        boolean isVectorOutput = desc.getVectorDesc().isVectorOutput();
-        if (isVectorOutput) {
-          // This GROUP BY does vectorize its output.
-          return false;
-        }
-        return true;
+  private void addOperatorChildrenToSet(Operator<? extends OperatorDesc> op,
+      Set<Operator<? extends OperatorDesc>> nonVectorizedOps) {
+    for (Operator<? extends OperatorDesc> childOp : op.getChildOperators()) {
+      if (!nonVectorizedOps.contains(childOp)) {
+        nonVectorizedOps.add(childOp);
+        addOperatorChildrenToSet(childOp, nonVectorizedOps);
       }
     }
+  }
+
+  // When Vectorized GROUPBY outputs rows instead of vectorized row batchs, we don't
+  // vectorize the operators below it.
+   private Boolean isVectorizedGroupByThatOutputsRows(Operator<? extends OperatorDesc> op)
+      throws SemanticException {
+    if (op.getType().equals(OperatorType.GROUPBY)) {
+      GroupByDesc desc = (GroupByDesc) op.getConf();
+      return !desc.getVectorDesc().isVectorOutput();
+    }
     return false;
   }
 
